# Load and prep data
data(weather)

# Define wind direction and season
weather <- weather %>%
  mutate(
    wind_direction = case_when(
      is.na(wind_dir) ~ NA_character_,
      wind_dir >= 337.5 | wind_dir < 22.5 ~ "N",
      wind_dir >= 22.5 & wind_dir < 67.5 ~ "NE",
      wind_dir >= 67.5 & wind_dir < 112.5 ~ "E",
      wind_dir >= 112.5 & wind_dir < 157.5 ~ "SE",
      wind_dir >= 157.5 & wind_dir < 202.5 ~ "S",
      wind_dir >= 202.5 & wind_dir < 247.5 ~ "SW",
      wind_dir >= 247.5 & wind_dir < 292.5 ~ "W",
      wind_dir >= 292.5 & wind_dir < 337.5 ~ "NW"
    ),
    season = case_when(
      month %in% c(3,4,5) ~ "Spring",
      month %in% c(6,7,8) ~ "Summer",
      month %in% c(9,10,11) ~ "Fall",
      month %in% c(12,1,2) ~ "Winter"
    )
  )

Part I

Q1: Suspicious Records

# Find suspicious values
weather %>% filter(temp > 100 | temp < -20 | wind_speed > 50) %>% 
  select(month, day, hour, temp, wind_speed)
## # A tibble: 3 × 5
##   month   day  hour  temp wind_speed
##   <int> <int> <int> <dbl>      <dbl>
## 1     2    12     3  39.0    1048.  
## 2     7    18    15 100.        9.21
## 3     7    19    16 100.       20.7
ggplot(weather, aes(x = temp)) + geom_histogram(bins = 50) + ggtitle("Temperature Distribution")

ggplot(weather, aes(x = wind_speed)) + geom_histogram(bins = 50) + ggtitle("Wind Speed Distribution")

# Fix suspicious values
weather <- weather %>%
  mutate(
    temp = ifelse(temp > 100 | temp < -20, NA, temp),
    wind_speed = ifelse(wind_speed > 50, NA, wind_speed)
  )

Q2: Wind Direction

table(weather$wind_direction, useNA = "ifany")
## 
##    E    N   NE   NW    S   SE   SW    W <NA> 
## 1383 4163 2459 4432 3908 1331 3822 4157  460
ggplot(weather %>% filter(!is.na(wind_direction)), aes(x = wind_direction)) +
  geom_bar() + theme_minimal() + ggtitle("Wind Direction Frequency")

Q3: Wind Direction by Season

ggplot(weather %>% filter(!is.na(wind_direction)), aes(x = season, fill = wind_direction)) +
  geom_bar(position = "dodge") + theme_minimal()

Q4: Temperature Distribution

ggplot(weather, aes(x = temp)) +
  geom_histogram(aes(y = ..density..), bins = 50) +
  stat_function(fun = dnorm, args = list(mean = mean(weather$temp, na.rm = TRUE), 
                                         sd = sd(weather$temp, na.rm = TRUE)), 
                color = "red") +
  geom_density(color = "blue") + theme_minimal()

ggplot(weather, aes(sample = temp)) + stat_qq() + stat_qq_line() + ggtitle("QQ Plot")

Q5: Temperature by Season

ggplot(weather, aes(x = season, y = temp)) + geom_boxplot() +
  stat_compare_means(comparisons = list(c("Summer", "Winter"), c("Spring", "Fall")))

Part II

daily <- weather %>%
  group_by(month, day) %>%
  summarise(
    mean_temp = mean(temp, na.rm = TRUE),
    min_temp = min(temp, na.rm = TRUE),
    max_temp = max(temp, na.rm = TRUE),
    mean_wind = mean(wind_speed, na.rm = TRUE),
    season = first(season),
    date = as.Date(paste0("2013-", month[1], "-", day[1])),
    .groups = "drop"
  )

Q6: Max vs Min Temperature

ggplot(daily, aes(x = min_temp, y = max_temp, color = season)) +
  geom_point() + geom_smooth(method = "lm", se = FALSE, aes(group = 1), color = "black")

Q7: Wind Speed vs Temperature

ggplot(daily, aes(x = mean_temp, y = mean_wind, color = season)) +
  geom_point() + geom_smooth(se = FALSE)

Q8: Ratio vs Difference

daily <- daily %>%
  mutate(
    ratio = max_temp / min_temp,
    diff = max_temp - min_temp
  )

p1 <- ggplot(daily, aes(x = season, y = ratio)) + geom_boxplot()
p2 <- ggplot(daily, aes(x = diff)) + geom_histogram(bins = 30) + facet_wrap(~season, nrow = 2)
ggarrange(p1, p2, ncol = 2)

Q9: Line Plot

p <- ggplot(daily, aes(x = date)) +
  geom_line(aes(y = mean_temp, color = "Mean")) +
  geom_line(aes(y = min_temp, color = "Min")) +
  geom_line(aes(y = max_temp, color = "Max")) +
  theme_minimal()

ggplotly(p)
# Animation
p_anim <- p + transition_reveal(date)
animate(p_anim)

Q10: Polar Area Chart

monthly <- weather %>%
  group_by(month) %>%
  summarise(
    min_temp = min(temp, na.rm = TRUE),
    max_temp = max(temp, na.rm = TRUE),
    .groups = "drop"
  )

ggplot(monthly) +
  geom_col(aes(x = factor(month), y = min_temp), fill = "lightblue") +
  geom_col(aes(x = factor(month), y = max_temp - min_temp), 
           position = position_nudge(y = monthly$min_temp), fill = "darkblue") +
  coord_polar() + theme_minimal()